In [1]:

    
import itertools as it
import numpy as np
import scipy as sp
import pandas as pd

Permutation and Combination

Permutation



In [2]:

    
list(it.permutations([1,2,3], 2))     # 2 represents number of values in a set









    Out[2]:





[(1, 2), (1, 3), (2, 1), (2, 3), (3, 1), (3, 2)]



In [3]:

    
len(list(it.permutations([1,2,3], 2)))









    Out[3]:





6



In [4]:

    
# With replacement
["".join(p) for p in it.permutations("122")]









    Out[4]:





['122', '122', '212', '221', '212', '221']



In [5]:

    
len(["".join(p) for p in it.permutations("122")])









    Out[5]:





6

Combination



In [6]:

    
list(it.combinations([1,2,3],2))









    Out[6]:





[(1, 2), (1, 3), (2, 3)]



In [7]:

    
len(list(it.combinations([1,2,3],2)))









    Out[7]:





3



In [8]:

    
# With replacement
list(it.combinations_with_replacement([1,2,3],2))









    Out[8]:





[(1, 1), (1, 2), (1, 3), (2, 2), (2, 3), (3, 3)]



In [9]:

    
len(list(it.combinations_with_replacement([1,2,3],2)))









    Out[9]:





6

Measures of Central Tendency

Arithmetic Mean



In [10]:

    
np.mean([5,2,4,3,6])









    Out[10]:





4.0

Weighted Mean



In [11]:

    
np.average([5,2,4,3,6], weights = [1, 2, 1, 3, 4])









    Out[11]:





4.181818181818182

Harmonic Mean



In [12]:

    
from scipy import stats
stats.hmean([5,2,4,3,6])









    Out[12]:





3.4482758620689657

Geometric Mean



In [13]:

    
stats.gmean([5,2,4,3,6])









    Out[13]:





3.7279192731913513

Median



In [14]:

    
np.median([5, 10, 24, 456])









    Out[14]:





17.0

Mode



In [15]:

    
sp.stats.mode([5, 4, 21, 1, 4, 2, 5, 1, 1])









    Out[15]:





ModeResult(mode=array([1]), count=array([3]))

Measures of Dispersion

Range



In [16]:

    
# Minimum
print(np.min([50,6,5,8]))

# Maximum
print(np.max([50,6,5,8]))

# Range = Maximum - Minumum
np.ptp([50,6,5,8])

Variance



In [17]:

    
np.var([1,1,10],ddof=0)      # ddof - degrees of freedom which defines N of denominator in variance formula. 
                             # ddof = 0 means N | ddof = 1 means N - 1









    Out[17]:





18.0

Standard Deviation



In [18]:

    
np.std([20, 1, 5])









    Out[18]:





8.178562764256865

z-score



In [19]:

    
stats.zscore([50, 10, 20])









    Out[19]:





array([ 1.37281295, -0.98058068, -0.39223227])

Quantiles



In [20]:

    
sp.stats.mstats.mquantiles([5,2,4,3,6]) # 0.25 | 0.50 | 0.75









    Out[20]:





array([2.7, 4. , 5.3])

InterQuartile Range



In [21]:

    
sp.stats.iqr([5,2,4,3,6])









    Out[21]:





2.0

Five point summary



In [22]:

    
pd.Series([5, 2, 4, 3, 6]).describe()









    Out[22]:





count    5.000000
mean     4.000000
std      1.581139
min      2.000000
25%      3.000000
50%      4.000000
75%      5.000000
max      6.000000
dtype: float64

Measure of Shape

Skewness



In [23]:

    
sp.stats.mstats.skew([5, 2, 350, 112, 22, 1000]) # right skewed









    Out[23]:





masked_array(data=1.39788443,
             mask=False,
       fill_value=1e+20)



In [24]:

    
sp.stats.mstats.skew([-80, -52, 3, 2]) # left skewed









    Out[24]:





masked_array(data=-0.22189074,
             mask=False,
       fill_value=1e+20)

Kurtosis



In [25]:

    
sp.stats.mstats.kurtosis([210, 55, 10, 20, 33, 4])









    Out[25]:





0.8096975919315055



In [ ]: